#!/usr/bin/env python
# coding: utf-8

# In[14]:


import os
os.chdir("C:\\Users\gmaze\\Documents\\GitHub\\job_insecurity\\data\\other")


# In[15]:


from pyscopus import Scopus
import json
import pandas as pd


# In[16]:


key = '4253ffae4ba04224c857e3d71d3eccbe'
scopus = Scopus(key)


# In[21]:


# Grab abstracts

search_df = scopus.search("( TITLE-ABS-KEY ( \"job security\"  OR  \"job insecurity\" ) )  AND  SUBJAREA ( arts  OR  busi  OR  deci  OR  econ  OR  psyc  OR  soci )", count=3979)

ids = search_df['scopus_id']
abstracts = []

for i in ids:
    try:
        pub_info = scopus.retrieve_abstract(f'{i}', './')
        a = pub_info['abstract']
        abstracts.append(a)
    except ValueError:
        a = "a"
        abstracts.append(a)

len(abstracts)


# In[1]:


# Put abstracts together with ids

data_tuples = list(zip(ids,abstracts))
df = pd.DataFrame(data_tuples, columns=["id", "abstract"])
df.to_pickle("raw\\abstracts.pkl")


# In[1]:


#Put id-abstract dataframe into excel file

df.to_excel("raw\\abstracts.xlsx", index=False)


# In[66]:


# Grab full text for those available

text_links=search_df['full_text']
full_text = []
for i in text_links:
    try:
        ft = scopus.retrieve_full_text(i)
        full_text.append(ft)
    except ValueError:
        ft = "a"
        full_text.append(ft)
    except KeyError: 
        ft = "a"
        full_text.append(ft)


# In[134]:


# Put together ids and full texts

texts = pd.merge(ids_duplicate, full_text, left_index=True, right_index=True)
texts = texts.rename(columns={"scopus_id": "id"})


# In[123]:


# Put id-full text dataframe into excel file

texts.to_excel("text.xlsx")


# In[136]:


# Merge id-abstract file with id-full text file
abstracts = pd.read_excel("raw\\abstracts.xlsx")
texts = texts.astype({'id': 'int64'})
merged = pd.merge(abstracts, texts, on="id")


# In[138]:


# Save id-abstract-full text dataframe as excel file

merged.to_excel("raw\\abstracts_full_texts.xlsx")

